##################################################
# Plot: Potential outcomed in a randomized experiment
##################################################

set.seed(12345)

# Sample size
n <- 5000

# Outcomes in control condition
p0 <- rnorm(n, mean = 1, sd = 1)

# Individual level causal effects
d <- rnorm(n, mean = 3, sd = 1) - p0 + 1

# Outcomes in experimental condition
p1 <- p0 + d

# Pick the colors for control, effects, and experimental condition
col_p0 <- "#CC79A7"
col_d <- "#56B4E9"
col_p1 <- "#E69F00"

# When vertically plotting the POs with the effects, we 
# need to determine the position of the individual people
# on the density scale. We're going to count up until the following value
# for placement
y_upper <- .5

# Generate dataframe
dat <- data.frame(p0, d, p1)

# Generate count variable which determines the vertical placement when plotting the effects
dat$count <- seq(from = 0, to = y_upper, length.out = nrow(dat))

# We will only plot a selection (inc = 1) to reduce clutter
# here: plot every 100th value
dat$inc <- rep(c(1, rep(0, times = 99)), times = n/100)
# inc_all are all points we will plot when going to the top of the panel
dat$inc_all <- dat$inc
# inc are all the points we will plot when we overlay the densities
# here we dont go up all the way to prevent clutter
dat$inc[dat$count > 0.3] <- 0
# transparency parameter to have the POs and individual effects fade when
# combined with the density distributions
dat$trans <- seq(from = 1, to = 0, length.out = nrow(dat))

# Get the plotting action started  
library(ggplot2)


# Plot of the individual level effects
ggplot(data = dat[dat$inc_all == 1,]) +
  # lines for the causal effects
  geom_segment(aes(y = count, x = p0, xend = p1), 
               color = col_d) +
  # outcome in control condition
  geom_point(aes(y = count, x = p0), color = col_p0) +
  # outcome in experimental condition
  geom_point(aes(y = count, x = p1), color = col_p1) +
  # scale limits
  coord_cartesian(xlim = c(-2, 7), ylim = c(0, y_upper)) +
  # make it pretty
  theme_classic() +
  xlab("Outcome") +
  ylab("") +
  theme(legend.position = "none",
  axis.text.y = element_blank(),    
  axis.ticks.y = element_blank())
ggsave("1.png", width = 4, height = 4)

# Density of the individual level effects
ggplot(data = dat) +
  # density distribution
  geom_density(aes(x = d), 
               color = col_d, fill = col_d, alpha = .2) +
  # average effect
  geom_vline(xintercept = mean(d), color = col_d, linetype = "dashed") +
  # coordinate and prettiness
  coord_cartesian(xlim = c(-2, 7), ylim = c(0, y_upper)) +
  theme_classic() +
  xlab("Individual-level causal effect") +
  ylab("Density") +
  theme(legend.position = "none",
        axis.text.y = element_blank(),    
        axis.ticks.y = element_blank())
ggsave("2.png", width = 4, height = 4)

# Plot the distributions of both POs

ggplot(data = dat) +
  # control condition
  geom_density(aes(x = p0), color = col_p0, fill = col_p0, alpha = .2) +
  # average outcome in control condition
  geom_vline(xintercept = mean(p0), color = col_p0, linetype = "dashed") +
  # experimental condition
  geom_density(aes(x = p1), color = col_p1, fill = col_p1, alpha = .2) +
  # average outcome in control condition
  geom_vline(xintercept = mean(p1), color = col_p1, linetype = "dashed") +
  # individual level causal effects
  geom_segment(data = dat[dat$inc == 1,], aes(y = count, x = p0, xend = p1, alpha = trans), 
               color = col_d) +
  # individual outcomes in control condition
  geom_point(data = dat[dat$inc == 1,], aes(y = count, x = p0, alpha = trans), color = col_p0) +
  # individual outcomes in experimental condition
  geom_point(data = dat[dat$inc == 1,], aes(y = count, x = p1, alpha = trans), color = col_p1) +
  # coordinates and prettiness
  coord_cartesian(xlim = c(-2, 7), ylim = c(0, y_upper)) +
  theme_classic() +
  xlab("Outcome") +
  ylab("Density") +
  theme(legend.position = "none",
        axis.text.y = element_blank(),    
        axis.ticks.y = element_blank())
ggsave("3.png", width = 4, height = 4)

# Randomly sample half of the data points
set.seed(12345)
dat$condition <- rbinom(n, size = 1, prob = .5)

# Plot the observed data in an experiment
ggplot(data = dat) +
  # slightly transparent individual-level causal effects
  geom_segment(data = dat[dat$inc_all == 1,], aes(y = count, x = p0, xend = p1), 
               color = col_d, alpha = .4) +
  # observed outcome in control condition
  geom_point(data = dat[dat$inc_all == 1 & dat$condition == 0,], aes(y = count, x = p0), color = col_p0) +
  # observed outcome in experimental condition
  geom_point(data = dat[dat$inc_all == 1 & dat$condition == 1,], aes(y = count, x = p1), color = col_p1) +
  # coordinates and prettiness
  coord_cartesian(xlim = c(-2, 7), ylim = c(0, y_upper)) +
  theme_classic() +
  xlab("Outcome") +
  ylab("") +
  theme(legend.position = "none",
        axis.text.y = element_blank(),    
        axis.ticks.y = element_blank())
ggsave("4.png", width = 4, height = 4)


# Density distributions of the observed outcomes
ggplot(data = dat) +
  # density in control condition
  geom_density(data = dat[dat$condition == 0,], aes(x = p0), color = col_p0, fill = col_p0, alpha = .2) +
  # average in control condition
  geom_vline(xintercept = mean(dat[dat$condition == 0, "p0"]), color = col_p0, linetype = "dashed") +
  # density in experimental condition
  geom_density(data = dat[dat$condition == 1,], aes(x = p1), color = col_p1, fill = col_p1, alpha = .2) +
  # average in experimental condition
  geom_vline(xintercept = mean(dat[dat$condition == 0, "p1"]), color = col_p1, linetype = "dashed") +
  # individual-level causal effects
  geom_segment(data = dat[dat$inc == 1,], aes(y = count, x = p0, xend = p1), 
               color = col_d, alpha = .4) +
  # outcome in control condition
  geom_point(data = dat[dat$inc == 1 & dat$condition == 0,], aes(y = count, x = p0, alpha = trans), color = col_p0) +
  # outcome in experimental condition
  geom_point(data = dat[dat$inc == 1 & dat$condition == 1,], aes(y = count, x = p1, alpha = trans), color = col_p1) +
  # make it look nice
  coord_cartesian(xlim = c(-2, 7), ylim = c(0, y_upper)) +
  theme_classic() +
  xlab("Outcome") +
  ylab("Density") +
  theme(legend.position = "none",
        axis.text.y = element_blank(),    
        axis.ticks.y = element_blank())
ggsave("5.png", width = 4, height = 4)
